Identificação de Cargas através de Gráfico de Recorrência

  • Artigo: ...
  • URL: ...
  • Source-code: ...
  • Estratégia proposta: converter série-temporal em GRÁFICO DE RECORRÊNCIA, extrair características com DNN (VG16) e classificação supervisionada.

Carregando ambiente e parâmetros


In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
plt.rc('text', usetex=False)
from matplotlib.image import imsave
import pandas as pd
import pickle as cPickle
import os, sys, cv2
from math import *
from pprint import pprint
from tqdm import tqdm_notebook
from mpl_toolkits.axes_grid1 import make_axes_locatable
from PIL import Image
from glob import glob
from IPython.display import display

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.applications.vgg16 import preprocess_input

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

from pyts.image import RecurrencePlot

REDD_RESOURCES_PATH = 'datasets/REDD'
BENCHMARKING2_RESOURCES_PATH = 'benchmarkings/Imaging-NILM-time-series/'

HYPOTHESIS_RESOURCES_PATH = 'datasets/hipotese1-recurrenceplot-vggembedding/'

sys.path.append(os.path.join(BENCHMARKING2_RESOURCES_PATH, ''))

from serie2QMlib import *

Pré-processamento dos dados


In [2]:
def standardize(serie):
    dev = np.sqrt(np.var(serie))
    mean = np.mean(serie)
    return [(each - mean) / dev for each in serie]

# Rescale data into [0,1]
def rescale(serie):
    maxval = max(serie)
    minval = min(serie)
    gap = float(maxval - minval)
    return [(each - minval) / gap for each in serie]

#modified from https://stackoverflow.com/questions/33650371/recurrence-plot-in-python
from sklearn.metrics import pairwise
def recurrence_plot(s, eps=None, steps=None, scaling = False):
    if type(s) == list:
        s = np.array(s)[:, None]
    if scaling:
        s = rescale(s)
    if eps==None: eps=0.1
    if steps==None: steps=10
    d = pairwise.pairwise_distances(s)
    d = np.floor(d / eps)
    d[d > steps] = steps
    #Z = squareform(d)
    return d

Parâmetros gerais dos dados utilizados na modelagem (treino e teste)


In [3]:
#################################
###Define the parameters here####
#################################

#datafiles = ['dish washer1-1']  # Data file name (TODO: alterar aqui)
#trains = [250]  # Number of training instances (because we assume training and test data are mixed in one file)
size = [32]  # PAA size
#GAF_type = 'GADF'  # GAF type: GASF, GADF
#save_PAA = True  # Save the GAF with or without dimension reduction by PAA: True, False
#rescale_type = 'Zero'  # Rescale the data into [0,1] or [-1,1]: Zero, Minusone

directory = os.path.join(HYPOTHESIS_RESOURCES_PATH, '') #the directory will be created if it does not already exist. Here the images will be stored
if not os.path.exists(directory):
    os.makedirs(directory)

Gerando dados

A fim de normalizar os benchmarkings, serão utilizados os dados das séries do bechmarking 1 para o processo de Extração de Características (conversão serie2image - benchmarking 2).

Extração de Características


In [4]:
def serie2RP(serie, scaling = False, s = 32):
    """
    Customized function to perform Series to Image conversion.
    
    Args:
        serie    : original input data (time-serie chunk of appliance/main data - REDD - benchmarking 1)
        s        : Size of output paaimage originated from serie [ INFO: PAA = (32, 32) / noPAA = (50, 50) ]
    """
    rp = None
    rpmatrix = None

    std_data = serie
    if scaling:
        std_data = rescale(std_data)
    paalistcos = recurrence_plot(std_data)#, s, None)
    # paalistcos = rescale(paa(each[1:],s,None))
    
    # paalistcos = rescaleminus(paa(each[1:],s,None))

    ################raw###################
    datacos = np.array(std_data)
    #print(datacos)
    datasin = np.sqrt(1 - np.array(std_data) ** 2)
    #print(datasin)

    paalistcos = np.array(paalistcos)
    paalistsin = np.sqrt(1 - paalistcos ** 2)

    datacos = np.matrix(datacos)
    datasin = np.matrix(datasin)

    paalistcos = np.matrix(paalistcos)
    paalistsin = np.matrix(paalistsin)
    if GAF_type == 'GASF':
        paamatrix = paalistcos.T * paalistcos - paalistsin.T * paalistsin
        matrix = np.array(datacos.T * datacos - datasin.T * datasin)
    elif GAF_type == 'GADF':
        paamatrix = paalistsin.T * paalistcos - paalistcos.T * paalistsin
        matrix = np.array(datasin.T * datacos - datacos.T * datasin)
    else:
        sys.exit('Unknown GAF type!')
        
    #label = np.asarray(label)
    image = matrix
    paaimage = np.array(paamatrix)
    matmatrix = np.asarray(matmatrix)
    fullmatrix = np.asarray(fullmatrix)
    #
    # maximage = maxsample(image, s)
    # maxmatrix = np.asarray(np.asarray([each.flatten() for each in maximage]))
    
    if save_PAA == False:
        finalmatrix = matmatrix
    else:
        finalmatrix = fullmatrix

    # uncomment below if needed data in pickled form
    # pickledata(finalmatrix, label, train, datafilename)

    #gengramImgs(image, paaimage, label, directory)
    
    return image, paaimage, matmatrix, fullmatrix, finalmatrix

In [11]:
# Reading power dataset (benchmark 1)
BENCHMARKING1_RESOURCES_PATH = "benchmarkings/cs446 project-electric-load-identification-using-machine-learning/"

size_paa = 32
size_without_paa = 30

# devices to be used in training and testing
use_idx = np.array([3,4,6,7,10,11,13,17,19])

label_columns_idx = ["APLIANCE_{}".format(i) for i in use_idx]

Conjunto de Treino


In [7]:
# Train...
train_power_chunks = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/train_power_chunks.npy') )
train_labels_binary = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/train_labels_binary.npy') )

# Testando função de conversão serie -> RP
# serie = list(np.sin(np.linspace(0,24,1000)))
serie = train_power_chunks[22, :].tolist() 
image = RecurrencePlot().fit_transform([serie])[0]

# Visualizing Serie/RP
fig = plt.figure(figsize=(15,5))
fig.tight_layout() # Or equivalently,  "plt.tight_layout()"
ax = fig.add_subplot(1, 1, 1)
ax.imshow(image, origin="lower", aspect="auto")
ax = fig.add_subplot(1, 2, 1)
plt.plot(list(range(0,len(serie))), serie);



In [11]:
print("Processing train dataset (Series to Images)...")

image_size_px = 32
image_size_inch = round(image_size_px / 71, 2) # Convert px to inch

fig = plt.figure(frameon=False)
fig.set_size_inches(image_size_inch, image_size_inch) # 0.45inch = 32px

rp_series_train = []

# Serie -> RP
X_rp = RecurrencePlot().fit_transform(train_power_chunks)

#for idx, row in tqdm_notebook(df_power_chunks.iterrows(), total = df_power_chunks.shape[0]):
for idx, power_chunk in tqdm_notebook(enumerate(train_power_chunks), total = train_power_chunks.shape[0]):

    serie = power_chunk
    image = X_rp[idx]
    labels = train_labels_binary[idx, :].astype('str').tolist()
    labels_str = ''.join(labels)
 
    # Persist image data files (PAA - noPAA)
    np.save(
        os.path.join( 
            HYPOTHESIS_RESOURCES_PATH, 
            "{}_{}_train.npy".format(idx, labels_str) 
        ), 
        image
    )
    # x is the array you want to save 
    imsave(
        os.path.join( 
            HYPOTHESIS_RESOURCES_PATH, 
            "{}_{}_train_color.png".format(idx, labels_str) 
        ), 
        arr=image
    )
#     ax = plt.Axes(fig, [0., 0., 1., 1.])
#     ax.set_axis_off()
#     fig.add_axes(ax)
#     ax.imshow(image, aspect='auto')
#     fig.savefig(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_color.png".format(idx, labels_str) ))
    Image.fromarray(image*255).convert('RGB').resize((image_size_px, image_size_px)).save(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_black.png".format(idx, labels_str) ))
    rp_series_train.append( list([idx]) + list(image.flatten()) + list(labels) )
    
# VIsualizing some results...
plt.figure(figsize=(8,6));

ax1 = plt.subplot(121);
plt.title("RP - RGB");
color_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_color.png".format(idx, labels_str) ))
plt.imshow(color_rp, origin="lower");

divider = make_axes_locatable(ax1);
cax = divider.append_axes("right", size="2.5%", pad=0.2);
plt.colorbar(cax=cax);

ax2 = plt.subplot(122);
black_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_train_black.png".format(idx, labels_str) ))
plt.title("RP - BlackWhite");
plt.imshow(black_rp, origin="lower");

print('Saving processed data...')
df_rp_train = pd.DataFrame(
    data = rp_series_train,
    columns = list(["IDX"]) + ["DIMESION_{}".format(d) for d in range(len(image.flatten()))] + list(label_columns_idx)
)
df_rp_train.to_csv(os.path.join( HYPOTHESIS_RESOURCES_PATH, "df_rp_train.csv"))


Processing dataset (Series to Images)...
Saving processed data...
<Figure size 32.4x32.4 with 0 Axes>

Conjunto de Teste


In [12]:
# Test...
test_power_chunks = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/test_power_chunks.npy') )
test_labels_binary = np.load( os.path.join(BENCHMARKING1_RESOURCES_PATH, 'datasets/test_labels_binary.npy') )

# Testando função de conversão serie -> RP
# serie = list(np.sin(np.linspace(0,24,1000)))
serie = test_power_chunks[22, :].tolist() 
image = RecurrencePlot().fit_transform([serie])[0]

# Visualizing Serie/RP
fig = plt.figure(figsize=(15,5))
fig.tight_layout() # Or equivalently,  "plt.tight_layout()"
ax = fig.add_subplot(1, 1, 1)
ax.imshow(image, origin="lower", aspect="auto")
ax = fig.add_subplot(1, 2, 1)
plt.plot(list(range(0,len(serie))), serie);



In [13]:
print("Processing test dataset (Series to Images)...")

image_size_px = 32
image_size_inch = round(image_size_px / 71, 2) # Convert px to inch

fig = plt.figure(frameon=False)
fig.set_size_inches(image_size_inch, image_size_inch) # 0.45inch = 32px

rp_series_test = []

# Serie -> RP
X_rp = RecurrencePlot().fit_transform(test_power_chunks)

#for idx, row in tqdm_notebook(df_power_chunks.iterrows(), total = df_power_chunks.shape[0]):
for idx, power_chunk in tqdm_notebook(enumerate(test_power_chunks), total = test_power_chunks.shape[0]):

    serie = power_chunk
    image = X_rp[idx]
    labels = test_labels_binary[idx, :].astype('str').tolist()
    labels_str = ''.join(labels)
 
    # Persist image data files (PAA - noPAA)
    np.save(
        os.path.join( 
            HYPOTHESIS_RESOURCES_PATH, 
            "{}_{}_test.npy".format(idx, labels_str) 
        ), 
        image
    )
    # x is the array you want to save 
    imsave(
        os.path.join( 
            HYPOTHESIS_RESOURCES_PATH, 
            "{}_{}_test_color.png".format(idx, labels_str) 
        ), 
        arr=image
    )
#     ax = plt.Axes(fig, [0., 0., 1., 1.])
#     ax.set_axis_off()
#     fig.add_axes(ax)
#     ax.imshow(image, aspect='auto')
#     fig.savefig(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_color.png".format(idx, labels_str) ))
    Image.fromarray(image*255).convert('RGB').resize((image_size_px, image_size_px)).save(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_black.png".format(idx, labels_str) ))
    rp_series_test.append( list([idx]) + list(image.flatten()) + list(labels) )
    
# VIsualizing some results...
plt.figure(figsize=(8,6));

ax1 = plt.subplot(121);
plt.title("RP - RGB");
color_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_color.png".format(idx, labels_str) ))
plt.imshow(color_rp, origin="lower");

divider = make_axes_locatable(ax1);
cax = divider.append_axes("right", size="2.5%", pad=0.2);
plt.colorbar(cax=cax);

ax2 = plt.subplot(122);
black_rp = plt.imread(os.path.join( HYPOTHESIS_RESOURCES_PATH, "{}_{}_test_black.png".format(idx, labels_str) ))
plt.title("RP - BlackWhite");
plt.imshow(black_rp, origin="lower");

print('Saving processed data...')
df_rp_test = pd.DataFrame(
    data = rp_series_test,
    columns = list(["IDX"]) + ["DIMESION_{}".format(d) for d in range(len(image.flatten()))] + list(label_columns_idx)
)
df_rp_test.to_csv(os.path.join( HYPOTHESIS_RESOURCES_PATH, "df_rp_test.csv"))


Processing test dataset (Series to Images)...
Saving processed data...
<Figure size 32.4x32.4 with 0 Axes>

Modelagem


In [5]:
def metrics(test, predicted):
    ##CLASSIFICATION METRICS

    acc = accuracy_score(test, predicted)
    prec = precision_score(test, predicted)
    rec = recall_score(test, predicted)    
    f1 = f1_score(test, predicted)
    f1m = f1_score(test, predicted, average='macro')
    

    # print('f1:',f1)
    # print('acc: ',acc)
    # print('recall: ',rec)
    # print('precision: ',prec)

    # # to copy paste print
    #print("{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format(acc, prec, rec, f1, f1m))

    # ##REGRESSION METRICS
    # mae = mean_absolute_error(test_Y,pred)
    # print('mae: ',mae)
    # E_pred = sum(pred)
    # E_ground = sum(test_Y)
    # rete = abs(E_pred-E_ground)/float(max(E_ground,E_pred))
    # print('relative error total energy: ',rete)
    return acc, prec, rec, f1, f1m


def plot_predicted_and_ground_truth(test, predicted):
    #import matplotlib.pyplot as plt
    plt.plot(predicted.flatten(), label = 'pred')
    plt.plot(test.flatten(), label= 'Y')
    plt.show()
    return

def embedding_images(images, model):
    
    # Feature extraction process with VGG16
    vgg16_feature_list = [] # Attributes array (vgg16 embedding)
    y = [] # Extract labels from name of image path[]

    for path in tqdm_notebook(images):

        img = keras_image.load_img(path, target_size=(100, 100))
        x = keras_image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # "Extracting" features...
        vgg16_feature = vgg16_model.predict(x)
        vgg16_feature_np = np.array(vgg16_feature)
        vgg16_feature_list.append(vgg16_feature_np.flatten())

        # Image (chuncked serie) 
        file_name = path.split("\\")[-1].split(".")[0]
        image_labels = [int(l) for l in list(file_name.split("_")[1])]
        y.append(image_labels)

    X = np.array(vgg16_feature_list)
    
    return X, y

Benchmarking (replicando estudo)


In [3]:
# Building dnn model (feature extraction)
vgg16_model = VGG16(
    include_top=False, 
    weights='imagenet', 
    input_tensor=None, 
    input_shape=(100, 100, 3), 
    pooling='avg',
    classes=1000
)

Embedding das imagens de Treino


In [6]:
# Colored recurrence plots generated
images = sorted(glob( 
    os.path.join(
        HYPOTHESIS_RESOURCES_PATH, 
        "*_train_color.png"
    ) 
))
X_train, y_train = embedding_images(images, vgg16_model)

# Data persistence
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'X_train.npy'), X_train)
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'y_train.npy'), y_train)



Embedding das imagens de Teste


In [8]:
images = sorted(glob( 
    os.path.join(
        HYPOTHESIS_RESOURCES_PATH, 
        "*_test_color.png"
    ) 
))
X_test, y_test = embedding_images(images, vgg16_model)

# Data persistence
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'X_test.npy'), X_test)
np.save( os.path.join(HYPOTHESIS_RESOURCES_PATH, 'y_test.npy'), y_test)



Treinando Classificador Supervisionado


In [9]:
# Training supervised classifier
clf = DecisionTreeClassifier(max_depth=15)

# Train classifier
clf.fit(X_train, y_train)

# Save classifier for future use
#joblib.dump(clf, 'Tree'+'-'+device+'-redd-all.joblib')


Out[9]:
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=15,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

Avaliando Classificador


In [12]:
# Predict test data
y_pred = clf.predict(X_test)

# Print metrics
final_performance = []
y_test = np.array(y_test)
y_pred = np.array(y_pred)

print("")
print("RESULT ANALYSIS\n\n")
print("ON/OFF State Charts")
print("-" * 115)
for i in range(y_test.shape[1]):
    
    fig  = plt.figure(figsize=(15, 2))
    plt.title("Appliance #{}".format( label_columns_idx[i]))
    plt.plot(y_test[:, i].flatten(), label = "True Y")
    plt.plot( y_pred[:, i].flatten(), label = "Predicted Y")
    plt.xlabel('Sample')
    plt.xticks(range(0, y_test.shape[0], 50))
    plt.xlim(0, y_test.shape[0])
    plt.ylabel('Status')
    plt.yticks([0, 1])
    plt.ylim(0,1)
    plt.legend()
    plt.show()
    
    acc, prec, rec, f1, f1m = metrics(y_test[:, i], y_pred[:, i])
    final_performance.append([
        label_columns_idx[i], 
        round(acc*100, 2), 
        round(prec*100, 2), 
        round(rec*100, 2), 
        round(f1*100, 2), 
        round(f1m*100, 2)
    ])

print("-" * 115)
print("")
print("FINAL PERFORMANCE BY APPLIANCE (LABEL):")
df_metrics = pd.DataFrame(
    data = final_performance,
    columns = ["Appliance", "Accuracy", "Precision", "Recall", "F1-score", "F1-macro"]
)
display(df_metrics)

print("")
print("OVERALL AVERAGE PERFORMANCE:")
final_performance = np.mean(np.array(final_performance)[:, 1:].astype(float), axis = 0)
display(pd.DataFrame(
    data = {
        "Metric": ["Accuracy", "Precision", "Recall", "F1-score", "F1-macro"],
        "Result (%)": [round(p, 2) for p in final_performance]
    }
))
# print("-----------------")
# print("Accuracy  : {0:.2f}%".format( final_performance[0] ))
# print("Precision : {0:.2f}%".format( final_performance[1] ))
# print("Recall    : {0:.2f}%".format( final_performance[2] ))
# print("F1-score  : {0:.2f}%".format( final_performance[3] ))
# print("F1-macro  : {0:.2f}%".format( final_performance[4] ))
# print("-----------------")


RESULT ANALYSIS


ON/OFF State Charts
-------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------------

FINAL PERFORMANCE BY APPLIANCE (LABEL):
Appliance Accuracy Precision Recall F1-score F1-macro
0 APLIANCE_3 54.55 54.71 57.80 56.21 54.48
1 APLIANCE_4 60.55 53.30 44.43 48.47 58.25
2 APLIANCE_6 97.58 0.00 0.00 0.00 49.39
3 APLIANCE_7 95.40 1.21 8.70 2.13 49.89
4 APLIANCE_10 98.45 62.86 55.00 58.67 78.94
5 APLIANCE_11 96.22 40.18 34.88 37.34 67.70
6 APLIANCE_13 98.68 9.52 21.05 13.11 56.22
7 APLIANCE_17 97.25 3.90 7.69 5.17 51.89
8 APLIANCE_19 88.15 26.74 5.30 8.85 51.25
OVERALL AVERAGE PERFORMANCE:
Metric Result (%)
0 Accuracy 87.43
1 Precision 28.05
2 Recall 26.09
3 F1-score 25.55
4 F1-macro 57.56

Conclusões

A adoção do gráfico de recorrência para o processo de classificação de série temporal, no contexto de NILM, apresentou desempenho superior ao método GAF (benchmarking 2).

Agora, o próximo passo é estruturar um pipeline único de comparação das 3 abordagens, que inclui desde o pré-processamento até a análise de resultados.


In [ ]: